

load("Recalls_merged_2019.RData")
Data$kenteken<-as.character(Data$kenteken)

#Remove duplicates
Data$dupl<-duplicated(Data$kenteken)
kentekendupl<-subset(Data, dupl==TRUE, select=kenteken)
Data<-subset(Data, !c(kenteken %in% kentekendupl$kenteken) )
Data$dupl<-NULL

#Prepare for counting variables
Data$num<-1
#Is the car recalled AND only from Nov 2017?
Data$rec<-0
Data$rec[is.na(Data$referentiecoderdw)==FALSE & Data$publicatiedatumrdw>="2017-11-01"]<-1


#Is the car recalled and BEFORE Nov 2017?
Data$rec_bef<-0
Data$rec_bef[is.na(Data$referentiecoderdw)==FALSE & Data$publicatiedatumrdw<"2017-11-01"]<-1



#Calculate number of total cars and recently recalled cars within each type-variant-version
Temp<-aggregate(cbind(num, rec, rec_bef) ~ typegoedkeuringsnummer + variant + uitvoering, data=Data, FUN="sum")
#Version ok if:
#1) Total cars > recalled cars
#2) At least one recalled car
#3) Type approval code present
#4) No cars with recalls BEFORE Nov 2017

Temp$ok<- Temp$num>Temp$rec & Temp$rec>0 & Temp$rec_bef==0 & Temp$typegoedkeuringsnummer!=""
table(Temp$ok)
#5643 versions with recalls and within variation and without cars with earlier recalls


##6211 versions with recalls and within variation, also including those with cars with earlier recalls
#Temp$ok1<- Temp$num>Temp$rec & Temp$rec>0 & Temp$typegoedkeuringsnummer!=""
#table(Temp$ok1)

#Create unique identifier for version with variation in recalls
varvariantver<-paste(Temp$typegoedkeuringsnummer[Temp$ok==TRUE],Temp$variant[Temp$ok==TRUE],Temp$uitvoering[Temp$ok==TRUE], sep="_")
Data$typevarver<-paste(Data$typegoedkeuringsnummer,Data$variant, Data$uitvoering, sep="_")

#Does the version belong with those with sufficient variation in recalls?
Data$var_type_var_ver1<-Data$typevarver %in% varvariantver



table(Data$var_type_var_ver1)
#513,919 total cars...
table(Data$var_type_var_ver1 & Data$rec==TRUE)
#Of which 282,687 (55.00%) are recalled

table(Data$var_type_var_ver1 & Data$rec==TRUE & is.na(Data$recall_fixed)==FALSE & str_detect(Data$datumtenaamstelling_all, "201[89]"))

#Remove unnecessary variables
rm(varvariantver, Temp, kentekendupl)
Data$typevarver<-NULL
Data$num<-NULL
Data$rec<-NULL

#Get only cars belonging to versions with sufficient variation in recalls
Split<-subset(Data, var_type_var_ver1==TRUE )
rm(Data)

#Get monthly sequence
months<-seq(as.Date("2017/11/1"), as.Date("2019/03/01"), "months")


#Create base panel
Panel<-expand.grid(Split$kenteken, months)
names(Panel)<-c("kenteken", "date")
rm(months)



#Get date first registration, in monthly format (date changed to 1)
#HERE ALSO ADD TYPE APPROVAL, VARIANT, VERSION
Temp<-subset(Split, select=c("kenteken", "typegoedkeuringsnummer", "variant", "uitvoering", "datumeersteafgiftenederland", "datumeerstetoelating"))
day(Temp$datumeersteafgiftenederland)<-01
day(Temp$datumeerstetoelating)<-01
#Merge
Panel<-merge(Panel, Temp, by="kenteken", all.x=TRUE)
#New cars already removed in 1d
rm(Temp)

Panel<-Panel[order(Panel$kenteken, Panel$date),]

#Remove dates after car was exported abroad
Temp<-subset(Split, select=c("kenteken", "exported_last")) 
#Now the month is the month the change took place, not the month after
Temp$exported_last<-as.Date(Temp$exported_last, format="%Y-%m-%d") %m-% months(1) 
day(Temp$exported_last)<-01
Panel<-merge(Panel, Temp, by="kenteken", all.x=TRUE, all.y=FALSE) 
Panel<-subset(Panel, date<=exported_last | is.na(exported_last)==TRUE)




#Get resale dates
Temp<-subset(Split, select=c("kenteken", "datumtenaamstelling_all")) 
Panel<-merge(Panel, Temp, by="kenteken", all.x=TRUE, all.y=FALSE) 
Panel$resale<-0
i<-str_detect(Panel$datumtenaamstelling_all, paste(month(Panel$date),year(Panel$date), sep="/"))
Panel$resale[i]<-1




#Get recall dates and recall fixing dates
Temp<-subset(Split, select=c(kenteken, referentiecoderdw, recall_new, recall_fixed)) 
Panel<-merge(Panel, Temp, by="kenteken", all.x=TRUE, all.y=FALSE)
day(Panel$recall_new)<-01
day(Panel$recall_fixed)<-01
#Dummy for date of recall
Panel$rec_new<-0
i<-Panel$recall_new==Panel$date & is.na(Panel$recall_new)==FALSE
Panel$rec_new[i]<-1
#Dummy for recall occurred (over time)
Panel$rec_new_t<-0
i<-Panel$recall_new<=Panel$date & is.na(Panel$recall_new)==FALSE
Panel$rec_new_t[i]<-1

#Generate number of months from recall
Panel$rec_new_dist<-1+((year(Panel$date)*12)+(month(Panel$date)))-((year(Panel$recall_new)*12)+month(Panel$recall_new))
Panel$rec_new_dist_both<-Panel$rec_new_dist
Panel$rec_new_dist[Panel$rec_new_dist<0]<-0
Panel$rec_new_dist[is.na(Panel$rec_new_dist)]<-0
Panel$rec_new_dist_both[is.na(Panel$rec_new_dist_both)]<-0

Panel$rec_new_dist_cat<-0
Panel$rec_new_dist_cat[Panel$rec_new_dist>=1 & Panel$rec_new_dist<=3]<-1
Panel$rec_new_dist_cat[Panel$rec_new_dist>=4 & Panel$rec_new_dist<=6]<-2
Panel$rec_new_dist_cat[Panel$rec_new_dist>=7 & Panel$rec_new_dist<=9]<-3
Panel$rec_new_dist_cat[Panel$rec_new_dist>=10 & Panel$rec_new_dist<=12]<-4
Panel$rec_new_dist_cat[Panel$rec_new_dist>=13]<-5
table(Panel$rec_new_dist_cat)

#Dummy for date of fixing
Panel$rec_fixed<-0
i<-Panel$recall_fixed==Panel$date & is.na(Panel$recall_fixed)==FALSE
Panel$rec_fixed[i]<-1
#Dummy for fixing occurred (over time)
Panel$rec_fixed_t<-0
i<-Panel$recall_fixed<=Panel$date & is.na(Panel$recall_fixed)==FALSE
Panel$rec_fixed_t[i]<-1

#Generate id for type-variant-version
Panel$id_ver<-as.numeric(as.factor(paste(Panel$typegoedkeuringsnummer, Panel$variant, Panel$uitvoering, sep="_")))


Panel<-Panel[order(Panel$id, Panel$kenteken, Panel$date),]


Panel$kenteken<-as.character(Panel$kenteken)

#Drop obs if date<date car gets in NL
i<-Panel$datumeersteafgiftenederland>Panel$date
table(i, useNA="always")
Panel<-subset(Panel, i==FALSE)
rm(i)

#Generate variable on age of car
Panel$age_car<-floor( ( (month(Panel$date)+year(Panel$date)*12)-(month(Panel$datumeerstetoelating)+year(Panel$datumeerstetoelating)*12) )/12 )
#Generate variable on time in NL
Panel$time_NL<-floor( ( (month(Panel$date)+year(Panel$date)*12)-(month(Panel$datumeersteafgiftenederland)+year(Panel$datumeersteafgiftenederland)*12) )/12 )




save(Panel, file="Panel.RData")